.TH "Slurm API" "3" "Slurm checkpoint functions" "April 2015" "Slurm checkpoint functions"

.SH "NAME"
slurm_checkpoint_able, slurm_checkpoint_complete, slurm_checkpoint_create,
slurm_checkpoint_disable, slurm_checkpoint_enable, slurm_checkpoint_error,
slurm_checkpoint_restart, slurm_checkpoint_vacate \- Slurm checkpoint functions

.SH "SYNTAX"
.LP
#include <slurm/slurm.h>
.LP
.LP
int \fBslurm_checkpoint_able\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	time_t *\fIstart_time\fP,
.br
);
.LP
int \fBslurm_checkpoint_complete\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	time_t \fIstart_time\fP,
.br
	uint32_t \fIerror_code\fP,
.br
	char *\fIerror_msg\fP
.br
);
.LP
int \fBslurm_checkpoint_create\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	uint16_t \fImax_wait\fP,
.br
	char *\fIimage_dir\fP
.br
);
.LP
int \fBslurm_checkpoint_disable\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP
.br
);
.LP
int \fBslurm_checkpoint_enable\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP
.br
);
.LP
int \fBslurm_checkpoint_error\fR (

.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	uint32_t *\fIerror_code\fP,
.br
	char ** \fIerror_msg\fP
.br
);
.LP
int \fBslurm_checkpoint_restart\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	uint16_t \fIstick\fP,
.br
	char *\fIimage_dir\fP
.br
);
.LP
.LP
int \fBslurm_checkpoint_tasks\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	time_t \fIbegin_time\fP,
.br
	char *\fIimage_dir\fP,
.br
	uint16_t \fImax_wait\fP,
.br
	char *\fInodelist\fP
.br
);
.LP
int \fBslurm_checkpoint_vacate\fR (
.br
	uint32_t \fIjob_id\fP,
.br
	uint32_t \fIstep_id\fP,
.br
	uint16_t \fImax_wait\fP,
.br
	char *\fIimage_dir\fP
.br
);

.SH "ARGUMENTS"
.LP
.TP
\fIbegin_time\fP
When to begin the operation.
.TP
\fIerror_code\fP
Error code for checkpoint operation. Only the highest value is preserved.
.TP
\fIerror_msg\fP
Error message for checkpoint operation. Only the \fIerror_msg\fP value for the highest
\fIerror_code\fP is preserved.
.TP
\fIimage_dir\fP
Directory specification for where the checkpoint file should be read from or
written to. The default value is specified by the \fIJobCheckpointDir\fP
Slurm configuration parameter.
.TP
\fIjob_id\fP
Slurm job ID to perform the operation upon.
.TP
\fImax_wait\fP
Maximum time to allow for the operation to complete in seconds.
.TP
\fInodelist\fP
Nodes to send the request.
.TP
\fIstart_time\fP
Time at which last checkpoint operation began (if one is in progress), otherwise zero.
.TP
\fIstep_id\fP
Slurm job step ID to perform the operation upon.
May be NO_VAL if the operation is to be performed on all steps of the specified job.
Specify SLURM_BATCH_SCRIPT to checkpoint a batch job.
.TP
\fIstick\fP
If non\-zero then restart the job on the same nodes that it was checkpointed from.

.SH "DESCRIPTION"
.LP
\fBslurm_checkpoint_able\fR
Report if checkpoint operations can presently be issued for the specified job step.
If yes, returns SLURM_SUCCESS and sets \fIstart_time\fP if checkpoint operation is
presently active. Returns ESLURM_DISABLED if checkpoint operation is disabled.
.LP
\fBslurm_checkpoint_complete\fR
Note that a requested checkpoint has been completed.
.LP
\fBslurm_checkpoint_create\fR
Request a checkpoint for the identified job step.
Continue its execution upon completion of the checkpoint.
.LP
\fBslurm_checkpoint_disable\fR
Make the identified job step non\-checkpointable.
This can be issued as needed to prevent checkpointing while
a job step is in a critical section or for other reasons.
.LP
\fBslurm_checkpoint_enable\fR
Make the identified job step checkpointable.
.LP
\fBslurm_checkpoint_error\fR
Get error information about the last checkpoint operation for a given job step.
.LP
\fBslurm_checkpoint_restart\fR
Request that a previously checkpointed job resume execution.
It may continue execution on different nodes than were
originally used.
Execution may be delayed if resources are not immediately
available.
.LP
\fBslurm_checkpoint_vacate\fR
Request a checkpoint for the identified job step.
Terminate its execution upon completion of the checkpoint.


.SH "RETURN VALUE"
.LP
Zero is returned upon success.
On error, \-1 is returned, and the Slurm error code is set appropriately.
.SH "ERRORS"
.LP
\fBESLURM_INVALID_JOB_ID\fR the requested job or job step id does not exist.
.LP
\fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested
action (e.g. trying to delete or modify another user's job).
.LP
\fBESLURM_JOB_PENDING\fR the requested job is still pending.
.LP
\fBESLURM_ALREADY_DONE\fR the requested job has already completed.
.LP
\fBESLURM_DISABLED\fR the requested operation has been disabled for this job step.
This will occur when a request for checkpoint is issued when they have been disabled.
.LP
\fBESLURM_NOT_SUPPORTED\fR the requested operation is not supported on this system.

.SH "EXAMPLE"
.LP
#include <stdio.h>
.br
#include <stdlib.h>
.br
#include <slurm/slurm.h>
.br
#include <slurm/slurm_errno.h>
.LP
int main (int argc, char *argv[])
.br
{
.br
	uint32_t job_id, step_id;
.LP
	if (argc < 3) {
.br
		printf("Usage: %s job_id step_id\\n", argv[0]);
.br
		exit(1);
.br
	}
.LP
	job_id = atoi(argv[1]);
.br
	step_id = atoi(argv[2]);
.br
	if (slurm_checkpoint_disable(job_id, step_id)) {
.br
		slurm_perror ("slurm_checkpoint_error:");
.br
		exit (1);
.br
	}
.br
	exit (0);
.br
}

.SH "NOTE"
These functions are included in the libslurm library,
which must be linked to your process for use
(e.g. "cc \-lslurm myprog.c").

.SH "COPYING"
Copyright (C) 2004\-2007 The Regents of the University of California.
Copyright (C) 2008\-2009 Lawrence Livermore National Security.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
CODE\-OCEC\-09\-009. All rights reserved.
.LP
This file is part of Slurm, a resource management program.
For details, see <https://slurm.schedmd.com/>.
.LP
Slurm is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
.LP
Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
details.

.SH "SEE ALSO"
.LP
\fBsrun\fR(1), \fBsqueue\fR(1), \fBfree\fR(3), \fBslurm.conf\fR(5)
